In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


import warnings
with warnings.catch_warnings():
                                           warnings.filterwarnings("ignore")
from warnings import simplefilter
simplefilter(action='ignore',category=FutureWarning)
In [2]:
from google.colab import files
uploaded=files.upload()
Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to enable.
Saving House data.csv to House data.csv
In [3]:
from google.colab import files
uploaded=files.upload()
Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to enable.
Saving test.csv to test.csv
In [4]:
from google.colab import files
uploaded=files.upload()
Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to enable.
Saving sample_submission.csv to sample_submission.csv
In [168]:
train=pd.read_csv("House data.csv")
test=pd.read_csv("test.csv")
In [6]:
print(train)
        Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0        1          60       RL         65.0     8450   Pave   NaN      Reg   
1        2          20       RL         80.0     9600   Pave   NaN      Reg   
2        3          60       RL         68.0    11250   Pave   NaN      IR1   
3        4          70       RL         60.0     9550   Pave   NaN      IR1   
4        5          60       RL         84.0    14260   Pave   NaN      IR1   
...    ...         ...      ...          ...      ...    ...   ...      ...   
1455  1456          60       RL         62.0     7917   Pave   NaN      Reg   
1456  1457          20       RL         85.0    13175   Pave   NaN      Reg   
1457  1458          70       RL         66.0     9042   Pave   NaN      Reg   
1458  1459          20       RL         68.0     9717   Pave   NaN      Reg   
1459  1460          20       RL         75.0     9937   Pave   NaN      Reg   

     LandContour Utilities  ... PoolArea PoolQC  Fence MiscFeature MiscVal  \
0            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   
1            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   
2            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   
3            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   
4            Lvl    AllPub  ...        0    NaN    NaN         NaN       0   
...          ...       ...  ...      ...    ...    ...         ...     ...   
1455         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   
1456         Lvl    AllPub  ...        0    NaN  MnPrv         NaN       0   
1457         Lvl    AllPub  ...        0    NaN  GdPrv        Shed    2500   
1458         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   
1459         Lvl    AllPub  ...        0    NaN    NaN         NaN       0   

     MoSold YrSold  SaleType  SaleCondition  SalePrice  
0         2   2008        WD         Normal     208500  
1         5   2007        WD         Normal     181500  
2         9   2008        WD         Normal     223500  
3         2   2006        WD        Abnorml     140000  
4        12   2008        WD         Normal     250000  
...     ...    ...       ...            ...        ...  
1455      8   2007        WD         Normal     175000  
1456      2   2010        WD         Normal     210000  
1457      5   2010        WD         Normal     266500  
1458      4   2010        WD         Normal     142125  
1459      6   2008        WD         Normal     147500  

[1460 rows x 81 columns]
In [169]:
train_original=train.copy()
test_original=test.copy()
In [7]:
print(test)
        Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0     1461          20       RH         80.0    11622   Pave   NaN      Reg   
1     1462          20       RL         81.0    14267   Pave   NaN      IR1   
2     1463          60       RL         74.0    13830   Pave   NaN      IR1   
3     1464          60       RL         78.0     9978   Pave   NaN      IR1   
4     1465         120       RL         43.0     5005   Pave   NaN      IR1   
...    ...         ...      ...          ...      ...    ...   ...      ...   
1454  2915         160       RM         21.0     1936   Pave   NaN      Reg   
1455  2916         160       RM         21.0     1894   Pave   NaN      Reg   
1456  2917          20       RL        160.0    20000   Pave   NaN      Reg   
1457  2918          85       RL         62.0    10441   Pave   NaN      Reg   
1458  2919          60       RL         74.0     9627   Pave   NaN      Reg   

     LandContour Utilities  ... ScreenPorch PoolArea PoolQC  Fence  \
0            Lvl    AllPub  ...         120        0    NaN  MnPrv   
1            Lvl    AllPub  ...           0        0    NaN    NaN   
2            Lvl    AllPub  ...           0        0    NaN  MnPrv   
3            Lvl    AllPub  ...           0        0    NaN    NaN   
4            HLS    AllPub  ...         144        0    NaN    NaN   
...          ...       ...  ...         ...      ...    ...    ...   
1454         Lvl    AllPub  ...           0        0    NaN    NaN   
1455         Lvl    AllPub  ...           0        0    NaN    NaN   
1456         Lvl    AllPub  ...           0        0    NaN    NaN   
1457         Lvl    AllPub  ...           0        0    NaN  MnPrv   
1458         Lvl    AllPub  ...           0        0    NaN    NaN   

     MiscFeature MiscVal MoSold  YrSold  SaleType  SaleCondition  
0            NaN       0      6    2010        WD         Normal  
1           Gar2   12500      6    2010        WD         Normal  
2            NaN       0      3    2010        WD         Normal  
3            NaN       0      6    2010        WD         Normal  
4            NaN       0      1    2010        WD         Normal  
...          ...     ...    ...     ...       ...            ...  
1454         NaN       0      6    2006        WD         Normal  
1455         NaN       0      4    2006        WD        Abnorml  
1456         NaN       0      9    2006        WD        Abnorml  
1457        Shed     700      7    2006        WD         Normal  
1458         NaN       0     11    2006        WD         Normal  

[1459 rows x 80 columns]
In [9]:
train.isnull().sum()[train.isnull().sum()>0]
Out[9]:
0
LotFrontage 259
Alley 1369
MasVnrType 872
MasVnrArea 8
BsmtQual 37
BsmtCond 37
BsmtExposure 38
BsmtFinType1 37
BsmtFinType2 38
Electrical 1
FireplaceQu 690
GarageType 81
GarageYrBlt 81
GarageFinish 81
GarageQual 81
GarageCond 81
PoolQC 1453
Fence 1179
MiscFeature 1406

In [10]:
train.dtypes
Out[10]:
0
Id int64
MSSubClass int64
MSZoning object
LotFrontage float64
LotArea int64
... ...
MoSold int64
YrSold int64
SaleType object
SaleCondition object
SalePrice int64

81 rows × 1 columns


In [11]:
train.shape
Out[11]:
(1460, 81)
In [12]:
train['SalePrice'].value_counts()
Out[12]:
count
SalePrice
140000 20
135000 17
145000 14
155000 14
190000 13
... ...
223000 1
257000 1
282922 1
193879 1
137450 1

663 rows × 1 columns


In [13]:
train['SalePrice'].value_counts(normalize=True)
Out[13]:
proportion
SalePrice
140000 0.013699
135000 0.011644
145000 0.009589
155000 0.009589
190000 0.008904
... ...
223000 0.000685
257000 0.000685
282922 0.000685
193879 0.000685
137450 0.000685

663 rows × 1 columns


In [14]:
train['SalePrice'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='SalePrice')
Out[14]:
<Axes: title={'center': 'SalePrice'}, xlabel='SalePrice'>
No description has been provided for this image
In [15]:
train['MSSubClass'].value_counts(normalize=True).plot.bar(title='MSSubClass')
Out[15]:
<Axes: title={'center': 'MSSubClass'}, xlabel='MSSubClass'>
No description has been provided for this image
In [18]:
train['MSZoning'].value_counts(normalize=True).plot.bar()
Out[18]:
<Axes: xlabel='MSZoning'>
No description has been provided for this image
In [19]:
plt.figure(1)
plt.subplot(111)
train['LotFrontage'].value_counts(normalize=True).plot.box()
Out[19]:
<Axes: >
No description has been provided for this image
In [20]:
plt.figure(1)
plt.subplot(111)
sns.distplot(train['LotArea'])
Out[20]:
<Axes: xlabel='LotArea', ylabel='Density'>
No description has been provided for this image
In [21]:
train['Street'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Street')
Out[21]:
<Axes: title={'center': 'Street'}, xlabel='Street'>
No description has been provided for this image
In [22]:
train['Alley'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Alley')
Out[22]:
<Axes: title={'center': 'Alley'}, xlabel='Alley'>
No description has been provided for this image
In [23]:
train['LotShape'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='LotShape')
Out[23]:
<Axes: title={'center': 'LotShape'}, xlabel='LotShape'>
No description has been provided for this image
In [24]:
train['LandContour'].value_counts(normalize=True).plot.bar()
Out[24]:
<Axes: xlabel='LandContour'>
No description has been provided for this image
In [25]:
train['Utilities'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Utilities')
Out[25]:
<Axes: title={'center': 'Utilities'}, xlabel='Utilities'>
No description has been provided for this image
In [26]:
train['LotConfig'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='LotConfig')
Out[26]:
<Axes: title={'center': 'LotConfig'}, xlabel='LotConfig'>
No description has been provided for this image
In [27]:
train['LandSlope'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='LandSlope')
Out[27]:
<Axes: title={'center': 'LandSlope'}, xlabel='LandSlope'>
No description has been provided for this image
In [28]:
train['Neighborhood'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Neighborhood')
Out[28]:
<Axes: title={'center': 'Neighborhood'}, xlabel='Neighborhood'>
No description has been provided for this image
In [29]:
train['Condition1'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Condition1')
Out[29]:
<Axes: title={'center': 'Condition1'}, xlabel='Condition1'>
No description has been provided for this image
In [30]:
train['Condition2'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Condition2')
Out[30]:
<Axes: title={'center': 'Condition2'}, xlabel='Condition2'>
No description has been provided for this image
In [32]:
train['BldgType'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Bldgtype')
Out[32]:
<Axes: title={'center': 'Bldgtype'}, xlabel='BldgType'>
No description has been provided for this image
In [33]:
train['HouseStyle'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='HouseStyle')
Out[33]:
<Axes: title={'center': 'HouseStyle'}, xlabel='HouseStyle'>
No description has been provided for this image
In [34]:
train['OverallQual'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='OverallQual')
Out[34]:
<Axes: title={'center': 'OverallQual'}, xlabel='OverallQual'>
No description has been provided for this image
In [36]:
train['FullBath'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='FullBath')
Out[36]:
<Axes: title={'center': 'FullBath'}, xlabel='FullBath'>
No description has been provided for this image
In [37]:
train['HalfBath'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='HalfBath')
Out[37]:
<Axes: title={'center': 'HalfBath'}, xlabel='HalfBath'>
No description has been provided for this image
In [38]:
train['PoolArea'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='PoolArea')
Out[38]:
<Axes: title={'center': 'PoolArea'}, xlabel='PoolArea'>
No description has been provided for this image
In [41]:
train['SaleCondition'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='SaleCondition')
Out[41]:
<Axes: title={'center': 'SaleCondition'}, xlabel='SaleCondition'>
No description has been provided for this image
In [44]:
train['SaleType'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='SaleType')
Out[44]:
<Axes: title={'center': 'SaleType'}, xlabel='SaleType'>
No description has been provided for this image
In [45]:
MSZoning=pd.crosstab(train['MSZoning'],train['SalePrice'])
MSZoning.div(MSZoning.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[45]:
<Axes: xlabel='MSZoning'>
No description has been provided for this image
In [46]:
MSSubClass=pd.crosstab(train['MSSubClass'],train['SalePrice'])
MSSubClass.div(MSSubClass.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[46]:
<Axes: xlabel='MSSubClass'>
No description has been provided for this image
In [47]:
SaleType=pd.crosstab(train['SaleType'],train['SalePrice'])
SaleType.div(SaleType.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[47]:
<Axes: xlabel='SaleType'>
No description has been provided for this image
In [48]:
SaleCondition=pd.crosstab(train['SaleCondition'],train['SalePrice'])
SaleCondition.div(SaleCondition.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[48]:
<Axes: xlabel='SaleCondition'>
No description has been provided for this image
In [49]:
PoolArea=pd.crosstab(train['PoolArea'],train['SalePrice'])
PoolArea.div(PoolArea.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[49]:
<Axes: xlabel='PoolArea'>
No description has been provided for this image
In [50]:
Electrical=pd.crosstab(train['Electrical'],train['SalePrice'])
Electrical.div(Electrical.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[50]:
<Axes: xlabel='Electrical'>
No description has been provided for this image
In [51]:
HouseStyle=pd.crosstab(train['HouseStyle'],train['SalePrice'])
HouseStyle.div(HouseStyle.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[51]:
<Axes: xlabel='HouseStyle'>
No description has been provided for this image
In [53]:
Heating=pd.crosstab(train['Heating'],train['SalePrice'])
Heating.div(Heating.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[53]:
<Axes: xlabel='Heating'>
No description has been provided for this image
In [55]:
train.groupby('SalePrice')['PoolArea'].mean().plot.bar()
Out[55]:
<Axes: xlabel='SalePrice'>
No description has been provided for this image
In [56]:
bins=[0,2500,4000,6000,81000]
group=['Low','Average','High','VeryHigh']
train['Area_bin']=pd.cut(train['LotArea'],bins,labels=group)
Area_bin=pd.crosstab(train['Area_bin'],train['SalePrice'])
Area_bin.div(Area_bin.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
plt.xlabel('LotArea')
plt.ylabel('Percentage')
Out[56]:
Text(0, 0.5, 'Percentage')
No description has been provided for this image
In [57]:
train=train.drop(['Area_bin'],axis=1)
In [131]:
train.isnull().sum()[train.isnull().sum()>0]
Out[131]:
0
LotFrontage 259
Alley 1369
MasVnrType 872
MasVnrArea 8
BsmtQual 37
BsmtCond 37
BsmtExposure 38
BsmtFinType1 37
BsmtFinType2 38
Electrical 1
FireplaceQu 690
GarageType 81
GarageYrBlt 81
GarageFinish 81
GarageQual 81
GarageCond 81
PoolQC 1453
Fence 1179
MiscFeature 1406

In [132]:
train.dtypes[train.isnull().sum()>0]
Out[132]:
0
LotFrontage float64
Alley object
MasVnrType object
MasVnrArea float64
BsmtQual object
BsmtCond object
BsmtExposure object
BsmtFinType1 object
BsmtFinType2 object
Electrical object
FireplaceQu object
GarageType object
GarageYrBlt float64
GarageFinish object
GarageQual object
GarageCond object
PoolQC object
Fence object
MiscFeature object

In [170]:
train['LotFrontage'].fillna(train['LotFrontage'].mode()[0],inplace=True)
train['Alley'].fillna(train['Alley'].mode()[0],inplace=True)
train['MasVnrType'].fillna(train['MasVnrType'].mode()[0],inplace=True)
train['MasVnrArea'].fillna(train['MasVnrArea'].mode()[0],inplace=True)
train['BsmtQual'].fillna(train['BsmtQual'].mode()[0],inplace=True)
train['BsmtCond'].fillna(train['BsmtCond'].mode()[0],inplace=True)
train['BsmtExposure'].fillna(train['BsmtExposure'].mode()[0],inplace=True)
train['BsmtFinType1'].fillna(train['BsmtFinType1'].mode()[0],inplace=True)
train['BsmtFinType2'].fillna(train['BsmtFinType2'].mode()[0],inplace=True)
train['Electrical'].fillna(train['Electrical'].mode()[0],inplace=True)
train['FireplaceQu'].fillna(train['FireplaceQu'].mode()[0],inplace=True)
train['GarageType'].fillna(train['GarageType'].mode()[0],inplace=True)
train['GarageYrBlt'].fillna(train['GarageYrBlt'].mode()[0],inplace=True)
train['GarageQual'].fillna(train['GarageQual'].mode()[0],inplace=True)
train['GarageCond'].fillna(train['GarageCond'].mode()[0],inplace=True)
train['PoolQC'].fillna(train['PoolQC'].mode()[0],inplace=True)
train['Fence'].fillna(train['Fence'].mode()[0],inplace=True)
train['MiscFeature'].fillna(train['MiscFeature'].mode()[0],inplace=True)
train['GarageFinish'].fillna(train['GarageFinish'].mode()[0],inplace=True)
In [158]:
train.isnull().sum()
Out[158]:
0
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 0
LotArea 0
... ...
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
SalePrice 0

81 rows × 1 columns


In [147]:
test.isnull().sum()[test.isnull().sum()>0]
Out[147]:
0
MSZoning 4
LotFrontage 227
Alley 1352
Utilities 2
Exterior1st 1
Exterior2nd 1
MasVnrType 894
MasVnrArea 15
BsmtQual 44
BsmtCond 45
BsmtExposure 44
BsmtFinType1 42
BsmtFinSF1 1
BsmtFinType2 42
BsmtFinSF2 1
BsmtUnfSF 1
TotalBsmtSF 1
BsmtFullBath 2
BsmtHalfBath 2
KitchenQual 1
Functional 2
FireplaceQu 730
GarageType 76
GarageYrBlt 78
GarageFinish 78
GarageCars 1
GarageArea 1
GarageQual 78
GarageCond 78
PoolQC 1456
Fence 1169
MiscFeature 1408
SaleType 1

In [171]:
test['LotFrontage'].fillna(test['LotFrontage'].mode()[0],inplace=True)
test['MSZoning'].fillna(test['MSZoning'].mode()[0],inplace=True)
test['Alley'].fillna(test['Alley'].mode()[0],inplace=True)
test['Utilities'].fillna(test['Utilities'].mode()[0],inplace=True)
test['Exterior1st'].fillna(test['Exterior1st'].mode()[0],inplace=True)
test['Exterior2nd'].fillna(test['Exterior2nd'].mode()[0],inplace=True)
test['BsmtFinSF1'].fillna(test['BsmtFinSF1'].mode()[0],inplace=True)
test['BsmtFinSF2'].fillna(test['BsmtFinSF2'].mode()[0],inplace=True)
test['BsmtUnfSF'].fillna(test['BsmtUnfSF'].mode()[0],inplace=True)
test['TotalBsmtSF'].fillna(test['TotalBsmtSF'].mode()[0],inplace=True)

test['MasVnrType'].fillna(test['MasVnrType'].mode()[0],inplace=True)
test['MasVnrArea'].fillna(test['MasVnrArea'].mode()[0],inplace=True)
test['BsmtQual'].fillna(test['BsmtQual'].mode()[0],inplace=True)
test['BsmtCond'].fillna(test['BsmtCond'].mode()[0],inplace=True)
test['BsmtExposure'].fillna(test['BsmtExposure'].mode()[0],inplace=True)
test['BsmtFinType1'].fillna(test['BsmtFinType1'].mode()[0],inplace=True)
test['BsmtFinType2'].fillna(test['BsmtFinType2'].mode()[0],inplace=True)

test['FireplaceQu'].fillna(test['FireplaceQu'].mode()[0],inplace=True)
test['GarageType'].fillna(test['GarageType'].mode()[0],inplace=True)
test['GarageYrBlt'].fillna(test['GarageYrBlt'].mode()[0],inplace=True)
test['GarageQual'].fillna(test['GarageQual'].mode()[0],inplace=True)
test['GarageCond'].fillna(test['GarageCond'].mode()[0],inplace=True)
test['GarageCars'].fillna(test['GarageCars'].mode()[0],inplace=True)
test['GarageArea'].fillna(test['GarageArea'].mode()[0],inplace=True)

test['SaleType'].fillna(test['SaleType'].mode()[0],inplace=True)
test['Fence'].fillna(test['Fence'].mode()[0],inplace=True)
test['PoolQC'].fillna(test['PoolQC'].mode()[0],inplace=True)

test['MiscFeature'].fillna(test['MiscFeature'].mode()[0],inplace=True)
test['GarageFinish'].fillna(test['GarageFinish'].mode()[0],inplace=True)

test['Functional'].fillna(test['Functional'].mode()[0],inplace=True)
test['BsmtFullBath'].fillna(test['BsmtFullBath'].mode()[0],inplace=True)
test['BsmtHalfBath'].fillna(test['BsmtHalfBath'].mode()[0],inplace=True)
test['KitchenQual'].fillna(test['KitchenQual'].mode()[0],inplace=True)
In [149]:
test.isnull().sum()
Out[149]:
0
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 0
LotArea 0
... ...
MiscVal 0
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0

80 rows × 1 columns


In [172]:
train=train.drop('Id',axis=1)
test=test.drop('Id',axis=1)

X=train.drop('SalePrice',axis=1)
Y=train.SalePrice

# Apply one-hot encoding separately
X = pd.get_dummies(X)
test = pd.get_dummies(test)

# Align columns - add missing columns to test set with fill_value=0
test_aligned = test.reindex(columns=X.columns, fill_value=0)
In [151]:
fig=plt.figure()
ax=plt.subplot(111)
cax=ax.matshow(train.corr(),vmin=1,vmax=1)
fig.colorbar(cax)
plt.show()
No description has been provided for this image
In [173]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
model=LinearRegression()
model.fit(x_train,y_train)
LinearRegression()
Out[173]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [174]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
In [175]:
pred_test=model.predict(test_aligned)
In [176]:
submission=pd.read_csv("sample_submission.csv")
In [177]:
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
In [178]:
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("linear.csv")
In [ ]:
from google.colab import files
files.download("linear.csv")
In [180]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
model=KNeighborsRegressor()
model.fit(x_train,y_train)
KNeighborsRegressor()
Out[180]:
KNeighborsRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor()
In [181]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
In [182]:
pred_test=model.predict(test_aligned)
In [183]:
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
In [184]:
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("KNN.csv")
In [185]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
model=SVR()
model.fit(x_train,y_train)
SVR()
Out[185]:
SVR()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SVR()
In [187]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)

pred_test=model.predict(test_aligned)

submission['SalePrice']=pred_test
submission['Id']=test_original['Id']


pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("SVR.csv")
In [193]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
model=DecisionTreeRegressor()
model.fit(x_train,y_train)
DecisionTreeRegressor()
Out[193]:
DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeRegressor()
In [194]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)

pred_test=model.predict(test_aligned)

submission['SalePrice']=pred_test
submission['Id']=test_original['Id']


pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("DT.csv")
In [195]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
model=RandomForestRegressor()
model.fit(x_train,y_train)
RandomForestRegressor()
Out[195]:
RandomForestRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestRegressor()
In [196]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)

pred_test=model.predict(test_aligned)

submission['SalePrice']=pred_test
submission['Id']=test_original['Id']


pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("Rf.csv")
In [197]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
model=AdaBoostRegressor()
model.fit(x_train,y_train)
AdaBoostRegressor()
Out[197]:
AdaBoostRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
AdaBoostRegressor()
In [ ]:
 
In [198]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)

pred_test=model.predict(test_aligned)

submission['SalePrice']=pred_test
submission['Id']=test_original['Id']


pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("AB.csv")
In [ ]:
 
In [199]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_squared_error
model=ExtraTreesRegressor()
model.fit(x_train,y_train)
ExtraTreesRegressor()
Out[199]:
ExtraTreesRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ExtraTreesRegressor()
In [200]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)

pred_test=model.predict(test_aligned)

submission['SalePrice']=pred_test
submission['Id']=test_original['Id']


pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("ET.csv")
In [201]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
model=GradientBoostingRegressor()
model.fit(x_train,y_train)
GradientBoostingRegressor()
Out[201]:
GradientBoostingRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GradientBoostingRegressor()
In [202]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)

pred_test=model.predict(test_aligned)

submission['SalePrice']=pred_test
submission['Id']=test_original['Id']


pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("GB.csv")
In [203]:
submission.to_csv('linear.csv',index=False)
from google.colab import files
files.download("linear.csv")
In [204]:
submission.to_csv("KNN.csv",index=False)
from google.colab import files
files.download("KNN.csv")
In [205]:
submission.to_csv("SVR.csv",index=False)
from google.colab import files
files.download("SVR.csv")
In [206]:
submission.to_csv("DT.csv",index=False)
from google.colab import files
files.download("DT.csv")
In [207]:
submission.to_csv("Rf.csv",index=False)
from google.colab import files
files.download("Rf.csv")
In [208]:
submission.to_csv("ET.csv",index=False)
from google.colab import files
files.download("ET.csv")
In [209]:
submission.to_csv("AB.csv",index=False)
from google.colab import files
files.download("AB.csv")
In [210]:
submission.to_csv("GB.csv",index=False)
from google.colab import files
files.download("GB.csv")
In [213]:
importances=pd.Series(model.feature_importances_,index=X.columns)
importances.plot(kind='barh',figsize=(80,80))
Out[213]:
<Axes: >
No description has been provided for this image
In [ ]:
from google.colab import files
uploaded=files.upload()
Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to enable.
In [ ]: